This library has been created to enable users to view examples of data visualisations utilising the NHS themes, using NHS public data sets. Users can load the required packages and create the sample data sets, and then choose which data visualisations they would like to run in their own environment.

The foundations of this library are sourced from a GGplot guide by Mike Perham. This has been expanded as a proof of concept for working collaboratively over regions to input and build upon, becoming the R Data Viz Library.

For more information about each data visualisation type, the below are recommended to review:

The Data Visualisation Catalogue: https://datavizcatalogue.com/index.html

Install/load required packages

if (!require("pacman")) install.packages("pacman"); library(pacman)

pacman::p_load(Rcpp, tidyverse,dplyr,tidyr,
               ggplot2,ggthemes,ggtext,scales,
               png,ggalt,NHSRdatasets,onsr,shinycssloaders, plotly, networkD3, FunnelPlotR, NHSRplotthedots,remotes,devtools,DT)

# Install some packages directly from GitHub
remotes::install_github("rOpenSci/fingertipsR",
                        build_vignettes = TRUE,
                        dependencies = "suggests",
                        build = F)

devtools::install_github("ricardo-bion/ggradar") 
library(ggradar)

Load sample data

All of the examples in this document use A&E dummy data from the NHSRdatasets package for NHS reporting, fingertips data for Public Health and ONS data for population data. These give us broad datasets that can be used for different data visualisation types.

More information on these packages can be found here:

NHSRdatasets:

https://github.com/nhs-r-community/NHSRdatasets

https://nhs-r-community.github.io/NHSRdatasets/

#Load initial dataset and clean up
Attends_1 <- NHSRdatasets::ae_attendances 
Attends_1$org_code <- as.character(Attends_1$org_code)
Attends <- Attends_1 %>%
  filter(org_code == "RXQ"|org_code=="RTH"|org_code=="RHW"|
           org_code == "RTK"|org_code == "RA2") %>%
  filter(type ==1) %>%
  select(-c(3,5))

Fingertips:

https://github.com/ropensci/fingertipsR

The below gets population by sex.

# Load Fingertips R
library(fingertipsR)

# Get available profiles in fingertips
profiles_data <- profiles()
print(profiles_data)
## # A tibble: 246 × 4
##    ProfileID ProfileName                        DomainID DomainName             
##        <int> <chr>                                 <int> <chr>                  
##  1        18 Smoking Profile                  1938132885 Key indicators         
##  2        18 Smoking Profile                  1938132886 Smoking prevalence in …
##  3        18 Smoking Profile                  1938132900 Smoking prevalence in …
##  4        18 Smoking Profile                  1938132887 Smoking related mortal…
##  5        18 Smoking Profile                  1938132888 Smoking related ill he…
##  6        18 Smoking Profile                  1938132889 Impact of smoking      
##  7        18 Smoking Profile                  1938132890 Smoking quitters       
##  8        19 Public Health Outcomes Framework    1000049 A. Overarching indicat…
##  9        19 Public Health Outcomes Framework    1000041 B. Wider determinants …
## 10        19 Public Health Outcomes Framework    1000042 C. Health improvement  
## # ℹ 236 more rows
# Find profiles related to population
population_profiles <- profiles_data %>%
  filter(grepl("population", ProfileName, ignore.case = TRUE))
print(population_profiles)
## # A tibble: 1 × 4
##   ProfileID ProfileName   DomainID DomainName 
##       <int> <chr>            <int> <chr>      
## 1       132 Populations 1938133081 Populations
# Search for indicators related to population structure, DomainID was found by viewing the population_profiles and selecting a DomainID
population_indicators <- indicator_metadata(DomainID = "1938133081") #Replace with DomainID from above

# Get the relevant indicator of the measure from your list of population_indicators
indicator_id <- 92708  # Replace with the actual indicator ID from the results above

#Get list of area types
area_types_data <- area_types()

# Get the data for the specific indicator
population_data <- fingertips_data(IndicatorID = indicator_id,AreaTypeID = "15") #Note for Area Types you can select all, but it will take a long time. 15 is England.

# Filter data to include only relevant columns and non-NA values
population_data_filtered <- population_data %>%
  filter(!is.na(Age), !is.na(Value)) %>%

  filter(Timeperiod == "2022")  %>%  # Alistair's addtion

  select(AreaName, Sex, Age, Value)

# Adjust the values for plotting (male values negative for pyramid structure)
population_data_filtered <- population_data_filtered %>%
  mutate(Value = ifelse(Sex == "Male", -Value, Value)) %>%
  filter(Age != "All ages") %>%
  filter(Sex != "Persons")

# Convert the Age column to a factor and specify the levels in the desired order
population_data_filtered$Age <- factor(population_data_filtered$Age, levels = c("0-4 yrs", "5-9 yrs", "10-14 yrs", "15-19 yrs", "20-24 yrs", "25-29 yrs", "30-34 yrs", "35-39 yrs", "40-44 yrs", "45-49 yrs", "50-54 yrs", "55-59 yrs", "60-64 yrs", "65-69 yrs", "70-74 yrs", "75-79 yrs", "80-84 yrs", "85-89 yrs","90+ yrs"))

ONS:

https://medium.com/@VickyCrockett1/how-do-you-get-data-into-r-from-the-ons-c860043fef8c

The next step loads the data and performs some simple filtering steps.

NHSR Theme

All of the examples in this document use dummy data from the NHSRdatasets package (more information on this package can be found here: https://github.com/nhs-r-community/NHSRtheme). As the package is not in CRAN, you need to use devtools to load the package from github.

## cli        (3.6.2  -> 3.6.3 ) [CRAN]
## digest     (0.6.35 -> 0.6.37) [CRAN]
## rlang      (1.1.3  -> 1.1.4 ) [CRAN]
## yaml       (2.3.8  -> 2.3.10) [CRAN]
## xfun       (0.44   -> 0.47  ) [CRAN]
## tinytex    (0.51   -> 0.52  ) [CRAN]
## knitr      (1.47   -> 1.48  ) [CRAN]
## bslib      (0.7.0  -> 0.8.0 ) [CRAN]
## Rcpp       (1.0.12 -> 1.0.13) [CRAN]
## colorspace (2.1-0  -> 2.1-1 ) [CRAN]
## rmarkdown  (2.27   -> 2.28  ) [CRAN]
## servr      (NA     -> 0.31  ) [CRAN]
## xaringan   (NA     -> 0.30  ) [CRAN]
## package 'cli' successfully unpacked and MD5 sums checked
## package 'digest' successfully unpacked and MD5 sums checked
## package 'rlang' successfully unpacked and MD5 sums checked
## package 'yaml' successfully unpacked and MD5 sums checked
## package 'xfun' successfully unpacked and MD5 sums checked
## package 'tinytex' successfully unpacked and MD5 sums checked
## package 'knitr' successfully unpacked and MD5 sums checked
## package 'bslib' successfully unpacked and MD5 sums checked
## package 'colorspace' successfully unpacked and MD5 sums checked
## package 'rmarkdown' successfully unpacked and MD5 sums checked
## package 'servr' successfully unpacked and MD5 sums checked
## package 'xaringan' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  D:\TEMP\RtmpecgK96\downloaded_packages
## ── R CMD build ─────────────────────────────────────────────────────────────────
##          checking for file 'D:\TEMP\RtmpecgK96\remotes4708284776f5\nhs-r-community-NHSRtheme-4829355/DESCRIPTION' ...  ✔  checking for file 'D:\TEMP\RtmpecgK96\remotes4708284776f5\nhs-r-community-NHSRtheme-4829355/DESCRIPTION' (364ms)
##       ─  preparing 'NHSRtheme': (784ms)
##    checking DESCRIPTION meta-information ...     checking DESCRIPTION meta-information ...   ✔  checking DESCRIPTION meta-information
##       ─  checking for LF line-endings in source and make files and shell scripts (427ms)
##   ─  checking for empty or unneeded directories
##      Omitted 'LazyData' from DESCRIPTION
##       ─  building 'NHSRtheme_0.1.0.tar.gz'
##      
## 
##   DarkBlue       Blue BrightBlue  LightBlue   AquaBlue      Black   DarkGrey 
##  "#003087"  "#005EB8"  "#0072CE"  "#41B6E6"  "#00A9CE"  "#231f20"  "#425563" 
##    MidGrey   PaleGrey  DarkGreen      Green LightGreen  AquaGreen     Purple 
##  "#768692"  "#E8EDEE"  "#006747"  "#009639"  "#78BE20"  "#00A499"  "#330072" 
##   DarkPink       Pink    DarkRed        Red     Orange WarmYellow     Yellow 
##  "#7C2855"  "#AE2573"  "#8A1538"  "#DA291C"  "#ED8B00"  "#FFB81C"  "#FAE100"

Data Over Time

Basic line chart

ggplot

# Filter initial dataset
line_df <- Attends %>%
  filter(org_code=="RXQ")

# Make plot
ggplot(line_df, aes(x = period, y = attendances)) +
  geom_line(colour = "#005EB8", size = 1.5) +
  scale_y_continuous(labels = comma) +
  labs(title="Type 1 attendances - Bucks Healthcare",
       subtitle = "April 2016 to March 2019",
       y = "Attendances",
       x = "Month") +
  expand_limits(y = 0)

plotly

# Filter initial dataset
line_df <- Attends %>%
  filter(org_code=="RXQ")

# Order the data frame by the date in ascending order - required for the chart to plot properly
line_df <- line_df[order(line_df$period),] 

# Plotly Chart
plot_ly(line_df, x = ~period, y = ~attendances, type = 'scatter', mode = 'lines', 
            line = list(color = "#005EB8", width = 3, dash = 'line'))  %>%
  
   layout(title = "Type 1 attendances - Bucks Healthcare", # Set titles and axis labels
         xaxis = list (title = "Month"),
         yaxis = list (title ="Attendances", range = c(0, max(line_df$attendances) * 1.1))) %>%  # Align the y axis scales with the ggplot chart
  
  config(modeBarButtonsToRemove = c("zoomIn2d", "zoomOut2d", "select2d", "lasso2d", "toImage",   # Remove any unnecessary functions of plotly chart
                                    "pan2d", "autoScale2d", "resetScale2d", "zoom2d"))

Multiple line chart

ggplot

#Filter initial dataset
multiple_line_df <- Attends %>%
  filter(org_code == "RXQ" | org_code=="RTH") 

#Make plot
ggplot(multiple_line_df,
         aes(x = period, y = attendances, colour = org_code)) +
  geom_line(size = 1) +
  geom_point() +
  scale_colour_manual(values = c("#005EB8", "#41B6E6")) +
  scale_y_continuous(labels = comma) +
  labs(
    title = "Type 1 attendances - Bucks Healthcare vs Royal Berkshire",
    subtitle = "April 2016 to March 2019",
    y = "Attendances",
    x = "Month"
  ) +
  expand_limits(y = 0) +
  theme(legend.title = element_blank())

plotly

#Filter initial dataset for one trust
multiple_line_rxq <- Attends %>%
  filter(org_code == "RXQ") %>%
  rename(attendrxq=attendances)

#Filter initial dataset for another trust
multiple_line_rth <- Attends %>%
  filter(org_code=="RTH") %>%
  rename(attend_rth=attendances) 

# Remove a couple of colums from both dataframes
line_rth = subset(multiple_line_rth, select = -c(admissions, org_code))
line_rxq = subset(multiple_line_rxq, select = -c(admissions, org_code))

# Combine data frames
multiple_line_data <-
  merge(line_rth, line_rxq)

# Create plotly chart
plot_ly(multiple_line_data, x = ~period, y = ~multiple_line_data$attend_rth, name = 'RTH', type = 'scatter', mode = 'lines',  

        line = list(color = 'rgb(118, 134)', width = 3, dash = 'dot'))  %>% 
  
  add_trace(y = ~multiple_line_data$attendrxq, name = 'RXQ', line = list(color = 'blue', width = 3, dash = 'solid'))%>%  ##Plot a second line in NHS blue
  
  layout(title = 'Type 1 attendances - Bucks Healthcare vs Royal Berkshire', # Set titles and axis labels
         xaxis = list (title = "Time Period"),
         yaxis = list (title ="Attendances", range = c(0, max(multiple_line_data$attend_rth) * 1.1))) %>%  # Align the y axis scales with the ggplot chart

  config(modeBarButtonsToRemove = c("zoomIn2d", "zoomOut2d", "select2d", "lasso2d", "toImage",   # Remove any unnecessary functions of plotly chart
                                    "pan2d", "autoScale2d", "resetScale2d", "zoom2d"))

SPC Charts

For more info see:- https://github.com/nhs-r-community/NHSRplotthedots

# Create a ggplot SPC using the NHSRplotthedots package

# Filter and format the data
sub_set <- ae_attendances %>%
  filter(org_code == "RQM", type == 1, period < as.Date("2018-04-01"))

# Plot the chart
sub_set %>%
  ptd_spc(value_field = breaches, date_field = period, improvement_direction = "decrease")

Comparisons

Simple bar chart

ggplot

# Filter initial dataset
bar_df <- Attends %>%
  filter(period == "2019-03-01")

# Make plot
bar <- ggplot(bar_df, aes(x = org_code, y = attendances)) +
  geom_bar(stat = "identity",
           position = "identity",
           fill = "#005EB8") +
  geom_hline(yintercept = 0,
             size = 1,
             colour = "#333333") +
  scale_y_continuous(labels = comma) +
  labs(
    title = "Type 1 attendances",
    subtitle = "March 2019",
    y = "Attendances",
    x = "Provider Code"
  )

plot(bar)

Add labels

The code below adds labels to your simple bar chart.

#Filter initial dataset
bar + geom_text(aes(label = scales::comma(attendances)), vjust =2, color= "White")

plotly

# Filter the data to select just the most recent date
simple_bar_data <- Attends %>% filter(period == "2019-03-01")   

#--------------------------------
# Plot the Chart

## Set bar colours
simple_bar_blue <- plot_ly(simple_bar_data, x = ~org_code, y = ~attendances, type = "bar", colors = '#005EB8') %>%

## Set titles and axis labels    
  layout(title = list(text = '<b>Type 1 attendances</b><br><sup>March 2019</sup>', x = 0, xanchor= 'left'),                           
         xaxis = list(title = "Provider Code"),
         yaxis = list(title = "Attendances"),
         legend = list() ) %>%
  
## Remove any unnecessary functions of plotly chart
config(modeBarButtonsToRemove = c("zoomIn2d", "zoomOut2d", "select2d", "lasso2d", "toImage",   
                                    "pan2d", "autoScale2d", "resetScale2d", "zoom2d"))

simple_bar_blue

Add labels

The code below adds labels to your simple bar chart.

#Plot the Chart

## Set bar colours
simple_bar_blue <- plot_ly(simple_bar_data, x = ~org_code, y = ~attendances, type = "bar", text = ~attendances, textposition = 'auto', marker = list(color = '#005EB8', line = list(colour ='#FFFFFF'), width = 1.5)) %>%

## Set titles and axis labels    
  layout(title = list(text = '<b>Type 1 attendances</b><br><sup>March 2019</sup>', x = 0, xanchor= 'left'),                           
         xaxis = list(title = "Provider Code"),
         yaxis = list(title = "Attendances"),
         legend = list() ) %>%
  
## Remove any unnecessary functions of plotly chart
config(modeBarButtonsToRemove = c("zoomIn2d", "zoomOut2d", "select2d", "lasso2d", "toImage",   
                                    "pan2d", "autoScale2d", "resetScale2d", "zoom2d"))

simple_bar_blue

Grouped bar chart

ggplot

# Filter initial dataset
grouped_bar_df <- Attends %>%
  filter(period == "2017-03-01" | period == "2019-03-01") %>%
  select(c(1:3))

# Make plot
ggplot(grouped_bar_df,
       aes(
         x = org_code,
         y = attendances,
         fill = as.factor(period)
       )) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_hline(yintercept = 0,
             size = 1,
             colour = "#333333") +
  scale_y_continuous(labels = comma) +
  #NHSRtheme::scale_fill_nhs('blues')+
  labs(
    title = "Attendances have increased in all providers other than Bucks Healthcare",
    subtitle = "March 2017 vs March 2019",
    y = "Attendances",
    x = "Provider Code"
  ) +
  theme(legend.title = element_blank())

plotly

# Filter initial dataset   
  grouped_bar_df_plotly <- Attends %>%
  filter(period == "2017-03-01" | period == "2019-03-01") %>%
  select(c(1:3))

# Change org_code to a character rather than a factor (needed for plotly x axis to work)
grouped_bar_df_plotly$org_code <- as.character(grouped_bar_df_plotly$org_code)

# Pivot so that org codes appear in different columns to allow grouped columns 
grouped_bar_df_plotly <- pivot_wider(grouped_bar_df_plotly,names_from = period,values_from = attendances)

# Create chart
plot_ly(data = grouped_bar_df_plotly, 
        x = ~org_code, 
        y = ~`2017-03-01`,
        marker = list(color = '#003087'), 
        name = '2017-03-01',
        type = "bar")%>%
    add_trace(data = grouped_bar_df_plotly, 
        x = ~org_code, 
        y = ~`2019-03-01`,
        name = '2019-03-01',
        marker = list(color = '#0072CE'))%>%
  
  ## Set titles and axis labels (note that there is no specific subtitle function in plotly so handled via annotations)
    layout(title = 'Attendances have increased in all providers other than Bucks Healthcare<br><sub>March 2017 vs March 2019</sub>',  
      xaxis = list(title = "Provider code"),
      yaxis = list(title = "Attendances"),
      barmode = 'group')%>%

config(modeBarButtonsToRemove = c("zoomIn2d", "zoomOut2d", "select2d", "lasso2d", "toImage","pan2d", "autoScale2d", "resetScale2d", "zoom2d"))

Stacked bar chart

ggplot

AttendsAll <- NHSRdatasets::ae_attendances %>%
  filter(
    org_code == "RXQ" | org_code == "RTH" | org_code == "RHW" |
      org_code == "RTK" | org_code == "RA2"
  ) %>%
  filter(period == '2017-03-01')

ggplot(AttendsAll, aes(fill = type, y = attendances, x = org_code)) +
  geom_bar(position = "stack", stat = "identity") +
  scale_y_continuous(labels = comma) +
  labs(title = "A&E attendances by department type - March 2017",
       y = "Attendances",
       x = "Provider Code") +
  theme(legend.title = element_blank())

plotly

AttendsAll <- NHSRdatasets::ae_attendances %>%
  filter(
    org_code == "RXQ" | org_code == "RTH" | org_code == "RHW" |
      org_code == "RTK" | org_code == "RA2"
  ) %>%
  filter(period == '2017-03-01')

#Perform any data manipulation needed
AttendsAll$org_code <- as.character(AttendsAll$org_code)
t1 <- AttendsAll %>%
  filter(
    org_code == "RXQ" | org_code == "RTH" | org_code == "RHW" |
      org_code == "RTK" | org_code == "RA2"
  ) %>%
  filter(period == '2017-03-01')
t1<- t1 %>% pivot_wider(names_from = type, values_from = attendances) 
#--------------------------------
#Plot the Chart
plot_ly(t1, x = ~org_code, y = ~`1`, type = "bar", marker = list(color = '#003087'), name = 'Type 1') %>%  ##Set bar colours
  add_trace(t1, x = ~org_code, y = ~`2`, marker = list(color = '#0072CE'), name = 'Type 2')%>% ## add next stack
  add_trace(t1, x = ~org_code, y = ~other, marker = list(color = '#41B6E6'), name = 'Other')%>% ## add next stack
                          
  layout(title = 'A&E Attendances by department type - March 2017',                           ##set titles and axis labels
         xaxis = list(title = "Organisation"),
         yaxis = list(title = "Attendances"),
         legend = list(),
         barmode = 'stack') %>% 
  
  config(modeBarButtonsToRemove = c("zoomIn2d", "zoomOut2d", "select2d", "lasso2d", "toImage",   ##remove any unnecessary functions of plotly chart
                                    "pan2d", "autoScale2d", "resetScale2d", "zoom2d"))

Percent stacked bar chart

ggplot

AttendsAll <- NHSRdatasets::ae_attendances %>%
  filter(
    org_code == "RXQ" | org_code == "RTH" | org_code == "RHW" |
      org_code == "RTK" | org_code == "RA2"
  ) %>%
  filter(period == '2017-03-01')

ggplot(AttendsAll, aes(fill = type, y = attendances, x = org_code)) +
  geom_bar(position = "fill", stat = "identity") +
  scale_y_continuous(labels = percent) +
  labs(title = "A&E attendances by department type - March 2017",
       y = "Attendances",
       x = "Provider Code") +
  theme(legend.title = element_blank())

plotly

AttendsAll <- NHSRdatasets::ae_attendances %>%

  filter(
    org_code == "RXQ" | org_code == "RTH" | org_code == "RHW" |
      org_code == "RTK" | org_code == "RA2"
  ) %>%
  filter(period == '2017-03-01') %>% 
  select(period,
         org_code,
         type,
         attendances)

# Change org code to a character rather than factor data type
AttendsAll$org_code <- as.character(AttendsAll$org_code)

# Restructure data so it works for stacked bar chart
Attends_wide <-AttendsAll |> 

  pivot_wider(names_from = type, values_from = attendances, values_fill = 0)

#Create copy with values as percent for charts
Attends_wide_per<- Attends_wide
Attends_wide_per[, -c(1,2)] <- Attends_wide[, -c(1,2)] / rowSums(Attends_wide[, -c(1,2)])

#--------------------------------
#Plot the Chart

plot_ly(Attends_wide_per, x = ~org_code, y = ~`1`, type = "bar", marker = list(color = '#003087'), name = 'Type 1') %>%  ##Set bar colours
  add_trace(Attends_wide_per, x = ~org_code, y = ~`2`, marker = list(color = '#0072CE'), name = 'Type 2')%>% ## add next stack
  add_trace(Attends_wide_per, x = ~org_code, y = ~other, marker = list(color = '#41B6E6'), name = 'Other')%>% ## add next stack
                          
  layout(title = 'Simple Blue Percentage Stacked Bar Chart',                           ##set titles and axis labels
         xaxis = list(title = "Organisation Code"),
         yaxis = list(title = "Attendances March 2017",
                      tickformat = ".0%"),
         legend = list(),
         barmode = 'stack') %>% 
  
  config(modeBarButtonsToRemove = c("zoomIn2d", "zoomOut2d", "select2d", "lasso2d", "toImage",   ##remove any unnecessary functions of plotly chart
                                    "pan2d", "autoScale2d", "resetScale2d", "zoom2d"))

Scatter plot

ggplot

Attends %>%
  ggplot(aes(y=admissions, x = attendances, color = org_code)) +
  geom_point() +
  labs(title = 'Scatter plot - admissions vs attendances',
       x = 'Attendances',
       y = 'Admissions') +
  scale_y_continuous(labels = label_number(suffix = " K", scale = 1e-3)) +
  scale_x_continuous(labels = label_number(suffix = " K", scale = 1e-3)) +
  theme(legend.title=element_blank())

plotly

plot_ly(Attends, x = ~attendances, y = ~admissions, type = 'scatter', mode = 'markers', color = ~org_code,
        hoverinfo = "text", text = ~ paste("Trust:", org_code, "</br></br>",
                                           "Attends:", attendances, "</br>",
                                           "Admits:", admissions, "</br>",
                                           "Date:", period) )  %>% 
  layout(title = 'Scatter plot',                           ##set titles and axis labels
         xaxis = list(title = "Attendances"),
         yaxis = list (title = "Admissions"),
         legend = list()) %>% 
  config(modeBarButtonsToRemove = c("zoomIn2d", "zoomOut2d", "select2d", "lasso2d", "toImage",   
                                    "pan2d", "autoScale2d", "resetScale2d", "zoom2d"))  ##remove any unnecessary functions

Bubble chart

ggplot

#For this example we are filtering on 5 organisations, type 1 activity & excluding column 3 from the dataframe.
AttendsBub <- NHSRdatasets::ae_attendances %>%
  filter(org_code == "RXQ"|org_code=="RTH"|org_code=="RHW"|
           org_code == "RTK"|org_code == "RA2")

  # Summarise data for type 1 attendances
  type_1_summary <- AttendsBub %>%
  filter(type == 1) %>%
  group_by(org_code, period) %>%
  summarise(type_1_attendances = sum(attendances, na.rm = TRUE)) %>%
  ungroup()

# Summarise data for non-type 1 attendances
type_other_summary <- AttendsBub %>%
  filter(type == 3|type== 2) %>%
  group_by(org_code, period) %>%
  summarise(type_other_attendances = sum(attendances, na.rm = TRUE)) %>%
  ungroup()

# Summarise total attendances and total admissions
total_summary <- AttendsBub %>%
  group_by(org_code, period) %>%
  summarise(
    total_attendances = sum(attendances, na.rm = TRUE),
    total_breaches = sum(breaches, na.rm = TRUE),
    total_admissions = sum(admissions, na.rm = TRUE)
  ) %>%
  ungroup()

# Merge the summaries into a single data frame
final_summary <- total_summary %>%
  left_join(type_1_summary, by = c("org_code", "period")) %>%
  left_join(type_other_summary, by = c("org_code", "period"))

# Replace NA values with 0 for type_1_attendances and type_3_attendances
final_summary <- final_summary %>%
  mutate(
    type_1_attendances = replace_na(type_1_attendances, 0),
    type_other_attendances = replace_na(type_other_attendances, 0)
  )

# Add percentage columns
final_summary <- final_summary %>%
  mutate(
    perc_admissions_attendances = (total_admissions / total_attendances) * 100,
    perc_type1_attendances_total = (type_1_attendances / total_attendances) * 100,
    perc_breaches_attendances = (total_breaches / total_attendances) * 100
  )

# Filter initial dataset
bubble_df <- final_summary

# Calculate size for bubble chart (proportional to type 1 attendances)
bubble_df <- bubble_df %>%
  mutate(size = perc_type1_attendances_total / max(perc_type1_attendances_total) * 100)

# Create bubble chart
ggplot(bubble_df, aes(x = perc_admissions_attendances, y = perc_breaches_attendances, size = size, color = size)) +
  geom_point(alpha = 0.5) +
  scale_size_continuous(name = "Proportion of type 1") +
  #NHSRtheme::scale_fill_nhs('blues', name = "Proportion of type 1") +
  labs(title = "Bubble Chart of % 4 Hour Breaches vs % converted to admission with % Attendances Type 1 Size",
       x = "Conversion Rate", y = "% 4 Hour Breaches") +
  theme(legend.position = "right")

plotly

#For this example we are filtering on 5 organisations, type 1 activity & excluding column 3 from the dataframe.
AttendsBub <- NHSRdatasets::ae_attendances %>%
  filter(org_code == "RXQ"|org_code=="RTH"|org_code=="RHW"|
           org_code == "RTK"|org_code == "RA2")

# Summarise data for type 1 attendances
  type_1_summary <- AttendsBub %>%
  filter(type == 1) %>%
  group_by(org_code, period) %>%
  summarise(type_1_attendances = sum(attendances, na.rm = TRUE)) %>%
  ungroup()

# Summarise data for non-type 1 attendances
type_other_summary <- AttendsBub %>%
  filter(type == 3|type== 2) %>%
  group_by(org_code, period) %>%
  summarise(type_other_attendances = sum(attendances, na.rm = TRUE)) %>%
  ungroup()

# Summarise total attendances and total admissions
total_summary <- AttendsBub %>%
  group_by(org_code, period) %>%
  summarise(
    total_attendances = sum(attendances, na.rm = TRUE),
    total_breaches = sum(breaches, na.rm = TRUE),
    total_admissions = sum(admissions, na.rm = TRUE)
  ) %>%
  ungroup()

# Merge the summaries into a single data frame
final_summary <- total_summary %>%
  left_join(type_1_summary, by = c("org_code", "period")) %>%
  left_join(type_other_summary, by = c("org_code", "period"))

# Replace NA values with 0 for type_1_attendances and type_3_attendances
final_summary <- final_summary %>%
  mutate(
    type_1_attendances = replace_na(type_1_attendances, 0),
    type_other_attendances = replace_na(type_other_attendances, 0)
  )

# Add percentage columns
final_summary <- final_summary %>%
  mutate(
    perc_admissions_attendances = (total_admissions / total_attendances) * 100,
    perc_type1_attendances_total = (type_1_attendances / total_attendances) * 100,
    perc_breaches_attendances = (total_breaches / total_attendances) * 100
  )

# Filter initial dataset
bubble_df <- final_summary

# Calculate size for bubble chart (proportional to type 1 attendances)
bubble_df <- bubble_df %>%
  mutate(size = perc_type1_attendances_total / max(perc_type1_attendances_total) * 100)


# Create bubble chart
plot_ly(bubble_df, x = ~perc_admissions_attendances, y = ~perc_breaches_attendances, text = "Proportion of type 1", type = 'scatter', mode = 'markers', color = ~size, colors = 'Blues', size = ~size, sizes = c(5,20),
        marker = list(sizemode = 'diameter', opacity = 0.7)) %>% 
  layout(title = 'Bubble Chart of % 4 Hour Breaches vs \n % converted to admission with % Attendances Type 1 Size',
         xaxis = list(showgrid = FALSE, title = "Conversion Rate"),
         yaxis = list(showgrid = FALSE, title = "% 4 Hour Breaches"))

Population Pyramid chart

ggplot

# Plot the population pyramid
ggplot(population_data_filtered, aes(x = Age, y = Value, fill = Sex)) +
  geom_bar(stat = "identity", position = "identity") +
  coord_flip() +
  scale_y_continuous(labels = function(x) comma(abs(x))) +
  labs(title = "Population Age Profile by Gender",
       x = "Age Group",
       y = "Population Count",
       fill = "Gender") +
  NHSRtheme::scale_fill_nhs("blues")

plotly

population_data_filtered %>% 
  # mutate(population = ifelse(test = gender == "M", yes = -population, no = population)) %>%
  mutate(abs_pop = abs(Value)) %>%
  plot_ly(x= ~Value, y=~Age, color=~Sex) %>% 
  add_bars(orientation = 'h', hoverinfo = 'text', text = ~abs_pop) %>%
  layout(bargap = 0.1, barmode = 'overlay',
         xaxis = list(tickmode = 'array', tickvals = c(-2000000, -1000000, 0, 1000000, 2000000),
                      ticktext = c('2M', '1M', '0', '1M', '2M')))

Radar chart

Further information on ggradar can be found via the GitHub Repository

# Create Example dataset
data <- tibble::tribble(
  ~group, ~Trauma_and_Orthopaedics, ~General_Surgery, ~Gynaecology, ~Ophthalmology, ~Dermatology,
  "Trust 1", 0.8, 0.9, 0.7, 0.6, 0.9,
  "Trust 2", 0.6, 0.7, 0.8, 0.7, 0.6,
  "Trust 3", 0.7, 0.8, 0.9, 0.8, 0.7
)

# Create the radar chart
ggradar(data, 
        values.radar = c("0%", "50%", "100%"), #Sets labels for min, min & max grid lines
        grid.min = 0, grid.mid = 0.5, grid.max = 1, #Defines the values for the grid lines
        group.line.width = 2, #Changes the line width of plotted line
        group.point.size = 3, #Changes the point size of plotted line
        group.colours = c("#00AFBB", "#E7B800", "#FC4E07"), #Colour for plotted lines based on group
        background.circle.colour = "white", #Plot background colour
        gridline.mid.colour = "grey", #Colour of gridlines
        legend.position = "bottom") #Position can be 

Radial Column chart

ggplot

# Create a radial column chart
ggplot(Attends, aes(x = reorder(org_code, -attendances), y = attendances)) +
  geom_col(width = 0.5, fill = "skyblue") +
  coord_polar(start = 0) +
   #NHSRtheme::scale_fill_nhs("blues") +
  scale_y_continuous(labels = function(x) comma(abs(x))) +
  labs(title = "Attendances per Trust",
       x = NULL, y = NULL)

plotly

data <- Attends %>% group_by(org_code) %>%
  summarise(sum_attends = sum(attendances))
data$angle = c(0, 72, 144, 216, 288)

plot_ly(
  type = 'barpolar',
  r = data$sum_attends,
  theta = data$angle
  ) %>%
  layout(
    title = 'Attendances per Trust',
    polar = list(
      radialaxis = list(
        visible = TRUE,
        range = c(0, 500000)
      ),
      angularaxis = list(
        tickmode = 'array',
        tickvals = data$angle,
        ticktext = data$org_code,
        ticklen = 15  # This indirectly adjusts spacing by lengthening tick marks
      ),
      bargap = 0.5,  # This adjusts the gap between bars
      bargroupgap = 0.5  # This adjusts the gap between bar groups
    )
  )

Span chart

ggplot

# Summarize the data
summary_data <- Attends %>%
  group_by(org_code) %>%
  summarise(min_attendance = min(attendances),
            max_attendance = max(attendances))

# Create the horizontal bar range chart
ggplot(summary_data, aes(y = org_code)) +
  geom_linerange(aes(xmin = min_attendance, xmax = max_attendance), color = "blue", size = 1.5) +
  labs(title = "Range of Type 1 Attendances by Trust between 2019 and 2023",
       x = "Number of Attendances",
       y = "Organisation Code") +
   NHSRtheme::scale_fill_nhs("blues")

plotly

# Summarize the data
summary_data_pl <- Attends %>%
  group_by(org_code) %>%
  summarise(min_attendance = min(attendances),
            max_attendance = max(attendances))

# Create the horizontal bar range chart
plot_ly(summary_data_pl, color=I("blue")) %>%
  add_segments(x = ~min_attendance, xend = ~max_attendance, y = ~org_code, yend = ~org_code) %>%
      layout(
      title = "Range of Type 1 Attendances by Trust between 2019 and 2023", 
      xaxis = list(title = "Number of Attendances"),
      yaxis = list(title = "Organisation Code"), #, categoryorder = "category descending"),
      margin = list(l = 65)
    ) %>%
    config(modeBarButtonsToRemove = c(
      "zoomIn2d", "zoomOut2d", "select2d", "lasso2d", # "toImage",
      "pan2d", "autoScale2d", "zoom2d"
    )) # ,"resetScale2d", "hoverClosestCartesian"))

Stacked area chart

ggplot

# Extract the data
AttendsSAC <- NHSRdatasets::ae_attendances %>%

  filter(org_code == "RXQ"|org_code=="RTH"|org_code=="RHW"|
           org_code == "RTK"|org_code == "RA2")

# Plot the stacked area chart
ggplot(AttendsSAC, aes(x = period, fill = org_code)) +
  geom_area(stat = "count") +  
  NHSRtheme::scale_fill_nhs("blues")

plotly

# Set the colours for the chart
blues <- c("#00A9CE", "#41B6E6", "#0072CE",   "#005EB8","#003087" )

# Extract the data
AttendsSAC <- NHSRdatasets::ae_attendances %>%
  filter(org_code == "RXQ"|org_code=="RTH"|org_code=="RHW"|
           org_code == "RTK"|org_code == "RA2") |>
  # sort by unique code in reverse so the chart is stacked in opposite alphabet order
  mutate(org_code = factor(org_code, levels = rev(sort(unique(org_code)))))

# Plot the stacked area chart 
plot_ly(data = AttendsSAC |> group_by(period, org_code) |>  summarise(count = n()) |> ungroup(),
        x = ~period,
        y = ~count,
        color = ~org_code,
        colors = blues,
        type = 'scatter',
        mode = 'lines',
        stackgroup = 'one',
        line = list(colors = blues),
        stackgroup = 'one') |>
# solid fill colours to replicate the ggplot style
  style(traces = 1, fillcolor = "#00A9CE") |>
  style(traces = 2, fillcolor = "#41B6E6") |>
  style(traces = 3, fillcolor = "#0072CE") |>
  style(traces = 4, fillcolor = "#005EB8") |>
  style(traces = 5, fillcolor = "#003087") |>
  layout(legend = list(title = list(text = "org_code")),
         xaxis = list(tickformat = "%Y",
                      dtick = "M12"))

Creating multiple charts for the same measure

Faceted chart

Use facet_wrap() to create multiple charts split by subgroup in data. You can use ncol = or nrow to specify number of rows or columns. For example, facet_wrap(~org_code, nrow=1) to put all charts in a single row.

ggplot(Attends, aes(x = period, y = attendances)) +
  geom_line(colour = "#005EB8", size = 1) +
  facet_wrap(~org_code)+
  labs(title="Type 1 attendances",
       subtitle = "April 2016 to March 2019") +
  expand_limits(y = 0)

Range

Box & Whisker chart

ggplot

g <- ggplot(Attends, aes(org_code, attendances))
g + geom_boxplot(varwidth=T, fill="light blue") + 
    labs(title="A&E Attendances", 
         subtitle="Distribution by Trust",
         caption="Source: A&E Monthly Stats",
         x="Trust",
         y="A&E attendances")

Bullet chart

ggplot

#Create a sample dataset - replace with your own
name = c("Trust A","Trust B","Trust C","Trust D","Trust E","Trust F","Trust G","Trust H","Trust I","Trust J")
count= c(89,85,76,64,50,45,29,20,10,5)

#Create a dataframe using sample data. Convert name to factor.
data = data.frame(name, count, stringsAsFactors = TRUE)

#Sets the coloured background
bullet_base <- data.frame(rank = c("Poor", "Ok", "Good", "Excellent"),
                          value = c(25, 25, 25, 25))
bullet_base_rep <- 
  do.call("rbind", replicate(nrow(data), bullet_base, simplify = FALSE)) %>%
  mutate(name = sort(rep(data$name, 4) ))

#Colour the background bars
bullet_colors <- c("#e44727", "#e4a727", "#61AB40", "#318100")
names(bullet_colors) <- c("Poor", "Ok", "Good", "Excellent")

#Create plot
ggplot() +
  geom_bar(data = bullet_base_rep, 
           aes(x = name, y = value, fill = rank), stat = "identity",
           position = "stack") +
  geom_bar(data = data, 
           aes(x = name, y = count), fill = "black", width = .2,
           stat = "identity") +
  scale_fill_manual(values = bullet_colors) +
  coord_flip(expand = FALSE)

Funnel Plot

For further information, see the FunnelPlotR GitHub Repository

MyAttends <- NHSRdatasets::ae_attendances %>%
  filter(period == '2017-03-01') %>%
  filter(type ==1) # %>%
  # select(-c(3,5)) 

MyAttends$org_code <- as.character(MyAttends$org_code)

funnel_plot(.data = MyAttends,
            numerator= breaches, #Specify the numerator
            denominator=attendances, #Specify the denominator
            group = org_code, #Specify that we want to plot Trust Names
            title = "A&E breaches", #Specify the chart title
            draw_adjusted = TRUE, #Specify that we want to adjust the control limits to account for over-dispersion
            sr_method = "SHMI", #Specify to adjust for over-dispersion using the CQC Methodology (can also use SHMI)
            # label = "highlight", #Specify that we want to use the 'highlight' argument to show outliers 
            # highlight=HighLight, #Get the highlight argument to reference the list of outlier NEY trusts 
            data_type="PR", #Specify the indicator is a proportion
            limit=95, #Specify to show both 95 and 99.8% control limits 
            multiplier = 100, 
            y_label = "% breaches", #Specify the X Axis Label
            x_label = "No. of attendances") #Specify the Y Axis Label

## A funnel plot object with 139 points of which 13 are outliers. 
## Plot is adjusted for overdispersion.

Gantt chart

ggplot

# Load the dataset
df <- read.csv("https://cdn.rawgit.com/plotly/datasets/master/GanttChart-updated.csv", 
               stringsAsFactors = FALSE)

# Define the start date of each task & convert to date
df$Start <- as.Date(df$Start, format = "%m/%d/%Y")
# Define the end of the task by adding the duration to the start date
df$End <- df$Start + df$Duration

# Prepare the data for plotting
gantt_data <- df %>%
  mutate(task_id = row_number()) %>%
  select(task_id, Task, Start, End, Resource)

# Create the Gantt chart using ggplot2
gantt_chart <- ggplot(gantt_data, aes(x = Start, xend = End, y = reorder(Task, -task_id), yend = reorder(Task, -task_id), color = factor(Resource))) +
  geom_segment(size = 5) +
  labs(title = "Gantt Chart with NHS Blue Shades",
       x = "Date",
       y = "Task") +
  NHSRtheme::theme_nhs() +
  scale_color_manual(values = colorRampPalette(c("#005EB8", "#73C2FB"))(length(unique(gantt_data$Resource))))

# Print the Gantt chart
print(gantt_chart)

Violin chart

ggplot

#Violin Plot
ggplot(Attends, aes(org_code, attendances)) + geom_violin() + 
  labs(title="A&E Attendances", subtitle="Range by Trust", caption="Source: A&E Monthly Statistics", x="Trust", y="Attendances") + scale_fill_brewer(palette="Blues") + theme_classic()

Dumbell chart

ggplot

The ggtext package can be used to add colour to titles or subtitles. You need to ensure that you use it with theme(plot.subtitle = element_markdown(hjust = 0, size = 12)) otherwise it will not work.

#Prepare data
dumbbell_df <- NHSRdatasets::ae_attendances %>%
  filter(type ==1) %>%
  select(-c(3,6)) %>%
  filter(period == "2017-03-01" | period =="2019-03-01") %>%
  mutate(period =as.numeric(format(period,'%Y'))) %>%
  mutate(period = as.character(period)) %>%
    mutate(performance = 1- (breaches/attendances)) %>%
  select(c(1:2,5)) %>%
  spread(period, performance) %>%
  mutate(gap = `2019` - `2017`) %>%
  arrange(desc(gap)) %>%
  head(10)

#Make plot
dumbell <- ggplot(dumbbell_df, aes(x = `2017`, xend = `2019`, y = reorder(org_code, gap), group = org_code)) + 
  geom_dumbbell(colour = "#dddddd",
                size = 3,
                colour_x = "#41B6E6",
                colour_xend = "#005EB8") +
  scale_x_continuous(labels = scales::percent_format(accuracy=1))+
  geom_vline(xintercept = 0.95, size = 1, colour="#333333", linetype = "dashed") +
  labs(title = "Performance improved for all providers",
    subtitle = "<span style='color: #41B6E6;'>March 2017 <span><span style='color: black;'> vs <span><span style='color: #005EB8;'>March 2019<span>") +
  xlab("4 hour performance") +
  ylab("Org code") +
 theme(plot.subtitle = element_markdown(hjust = 0, size = 12))+ theme(legend.position = "none")

plot(dumbell)

Adding annotations

You can use geom_label to add annotations to existing plots or you can add line in when creating ggplot.

dumbell + geom_label(aes(x = 0.9, y = "R1K",label = "Standard"), 
                           hjust = -0.5, 
                           vjust = -0.1, 
                           colour = "#555555",
                           label.size = NA, 
                           family="Arial", 
                           size = 4)

plotly

# Initial data wrangling
dumbbellplotly_df <- dumbbell_df
dumbbellplotly_df$org_code <- as.character(dumbbellplotly_df$org_code)
dumbbellplotly_df$`2017`<- round(dumbbellplotly_df$`2017`*100,1) 
dumbbellplotly_df$`2019`<- round(dumbbellplotly_df$`2019`*100,1)
dumbbellplotly_df$`gap`<- round(dumbbellplotly_df$`gap`*100,1) 

# Region chart  This sets up a blank plotly object to begin with 
 dumbellplotly<- plot_ly(dumbbellplotly_df, color = I("gray80")) 

# It then adds a line for each 'org_code that starts at 2017 and goes through to the 2019 number 
 dumbellplotly <-dumbellplotly %>% add_segments(x = ~dumbbellplotly_df$`2017`, xend = ~dumbbellplotly_df$`2019`, y = ~org_code, yend = ~org_code, showlegend = FALSE)

# It then adds a dot on the 2017 end of the line in red 
dumbellplotly <- dumbellplotly %>% add_markers(x = ~dumbbellplotly_df$`2017`, y = ~org_code, name = "2017", color = I("#AE2573"),size = 8 )

# It then adds a dot on the 2019 end of the line in blue 
dumbellplotly <- dumbellplotly %>% add_markers(x = ~dumbbellplotly_df$`2019`,y = ~org_code, name = "2019", color = I("#0072CE"),size = 8 )

#The titles are then added 
dumbellplotly<- dumbellplotly %>% layout( title = "Performance improved for all providers",   
xaxis = list(title = "4 hour performance", ticksuffix="%"), margin = list(l = 65),
    shapes = list(list(type = "line",
    x0 = 95, 
    x1 = 95,
    y0 = 0,
    y1 = "RXC",
    ref = "x",
     yref = "y",
    line = list(color="#333333",dash = "dash"))),
    annotations = list(
             list(
               x = 95,
               y = "RXC",
               xref = "x",
               yref = "y",
               text = "Standard (95%)",
               showarrow = TRUE,
               arrowhead = 2,
               # ax = 20,
               # ay = -30,
               font = list(color = "555555#")
            )))

dumbellplotly

Movement or Flow

Sankey Diagram

Further information on the sankeyNetwork function can be found on the Rdocumentation website

Sankey_data<- NHSRdatasets::LOS_model %>%   
  group_by(Organisation,Age) %>% 
  summarise(Attends= sum(LOS)) 

Sankey_data<- filter ( Sankey_data, Age >90) %>% 
  arrange(Organisation, Age)

##The first step is to designate nodes this just writes a list of all the Different 'Source' and 'Target' Nodes
nodes <- data.frame(
  name=c(as.character(Sankey_data$Organisation), 
         as.character(Sankey_data$Age)) %>% unique())

# With networkD3,  you need to provide the details of the connection between 'nodes' these must be provided using an 'id'
##, not using real name like in the links dataframe. So we need to reformat it.
Sankey_data$IDsource <- match(Sankey_data$Organisation, nodes$name)-1 
Sankey_data$IDtarget <- match(Sankey_data$Age, nodes$name)-1
Sankey_data<-Sankey_data %>% arrange(IDsource,IDtarget)

##Make the sankey network , Iterations=0  means that the the data is displayed according to how it is sorted in the initial df, starting with the source node 
sankey <- networkD3::sankeyNetwork(Links = as.data.frame.matrix(Sankey_data), Nodes = nodes,
                         Source = "IDsource", Target = "IDtarget",
                         Value =  "Attends", NodeID = "name", sinksRight = TRUE, fontSize = 18,iterations =0)

sankey <- htmlwidgets::prependContent(sankey, htmltools::tags$h4("Attends by Age"))

sankey

Attends by Age